# Clears work environment
rm(list = ls())

# Sets working directory
setwd('C:/Users/Jason/Box Sync/Home Folder jdt34/733 - Maximum Likelihood Estimation/Lai & Slater 2006 replication')

# Loads data and libraries
load(file='cleanedLSdata.RData')
loadPkg(c(packs, 'maps'))

# Plots distribution of dependent variable
ggdvdist = ggplot(data=lsm, aes(x=get(dv))) + 
  geom_density(fill='black') + 
  xlab('number of major MID initiations') +
  labs(title='Distribution of dependent variable') +
  scale_x_continuous(breaks=seq(0, 10, 1))
ggdvdist.overlay = ggplot(data=lsm, aes(x=get(dv), color=regime, fill=regime)) + 
  geom_density(alpha=.5) + 
  xlab('number of major MID initiations') +
  labs(title='Distribution of dependent variable by regime type') +
  scale_x_continuous(breaks=seq(0, 10, 1)) + 
  guides(fill=guide_legend(override.aes=list(alpha=1)))
ggdvdist.facet = ggplot(data=lsm, aes(x=get(dv), color=regime, fill=regime)) + 
  geom_density(alpha=.5) + 
  xlab('number of major MID initiations') +
  labs(title='Distribution of dependent variable by regime type') +
  scale_x_continuous(breaks=seq(0, 10, 1)) + 
  facet_wrap(~ regime, ncol=2) +
  guides(fill=guide_legend(override.aes=list(alpha=1)))

# Plots distribution of dependent variable over time
midyear = by(lsm$majorinitone, factor(lsm$year), sum, simplify=F)
midyear = do.call(rbind.data.frame, midyear)
midyear$year = as.numeric(rownames(midyear))
colnames(midyear)[1] = 'MIDs'
dvts = ggplot(data=midyear, aes(x=year, y=MIDs)) + 
  geom_line(size=2, color='#F8766D') +
  xlab('') +
  ylab('') +
  ylim(0, 30)
ggsave(file='dvts.pdf', dvts, width=9, height=1.5, units='in')

### Plots distribution of dependent variable over space

# Prepares summary of dependent variable; zeroes will be greyed out
midsum = by(lsm$majorinitone, factor(lsm$ccode), sum, simplify=F)
midsum = do.call(rbind.data.frame, midsum) 
midsum$id = rownames(midsum)
rownames(midsum) = NULL
colnames(midsum)[1] = 'MIDs'
midsum[which(midsum$MIDs==0), 1] = NA

# Prepares map with COW codes
map.dat = map_data("world")
map1 = unique(map.dat$region)
map2 = countrycode(map1, origin='country.name', destination='cown')
map3 = cbind(map1, map2)
map3[is.na(map3[,2]),]
map3[which(map3[,1]=='French Guiana'), 2] = 220
map3[which(map3[,1]=='Hawaii'), 2] = 2
map3[which(map3[,1]=='French Polynesia'), 2] = 220
map3[which(map3[,1]=='Cook Islands'), 2] = 920
map3[which(map3[,1]=='Saint-Barthelemy'), 2] = 220
map3[which(map3[,1]=='Greenland'), 2] = 390
map3[which(map3[,1]=='South Sandwich Islands'), 2] = 200
map3[which(map3[,1]=='Guadeloupe'), 2] = 220
map3[which(map3[,1]=='Anguilla'), 2] = 200
map3[which(map3[,1]=='Saint Kitts'), 2] = 60
map3[which(map3[,1]=='Montserrat'), 2] = 220
map3[which(map3[,1]=='Sardinia'), 2] = 325
map3[which(map3[,1]=='Sicily'), 2] = 325
map3[which(map3[,1]=='Sonsorol Island'), 2] = 986
map3[which(map3[,1]=='New Caledonia'), 2] = 220
map3[which(map3[,1]=='Turks and Caicos'), 2] = 200
map3[which(map3[,1]=='Tokelau'), 2] = 920
map3[which(map3[,1]=='Maug Island'), 2] = 2
map3[which(map3[,1]=='Pitcairn Islands'), 2] = 200
map3[which(map3[,1]=='Isle of Man'), 2] = 200
map3[which(map3[,1]=='Saint Eustatius'), 2] = 210
map3[which(map3[,1]=='California'), 2] = 2
map3[which(map3[,1]=='Andaman Islands'), 2] = 750
map3[which(map3[,1]=='Northern Mariana Islands'), 2] = 2
map3[which(map3[,1]=='Nevis'), 2] = 60
map3[which(map3[,1]=='Madeira Islands'), 2] = 235
map3[which(map3[,1]=='Sin Cowe Island'), 2] = 816
map3[which(map3[,1]=='Paracel Islands'), 2] = 710
map3[which(map3[,1]=='Azores'), 2] = 235
map3[which(map3[,1]=='Falkland Islands'), 2] = 200
map3[which(map3[,1]=='American Samoa'), 2] = 2
map3[which(map3[,1]=='Cayman Islands'), 2] = 200
map3[which(map3[,1]=='Virgin Islands'), 2] = 200
map3[which(map3[,1]=='Canary Islands'), 2] = 230
map3[which(map3[,1]=='Barbuda'), 2] = 58
map3[which(map3[,1]=='Puerto Rico'), 2] = 2
map3[which(map3[,1]=='Chagos Archipelago'), 2] = 200
map3[which(map3[,1]=='Spratly Island'), 2] = 2
map3[which(map3[,1]=='Wales'), 2] = 200
map3[which(map3[,1]=='Bonaire'), 2] = 210
map3[which(map3[,1]=='Aruba'), 2] = 210
map3[which(map3[,1]=='Martinique'), 2] = 220
map3[which(map3[,1]=='Irian Jaya'), 2] = 850
map3[which(map3[,1]=='Isle of Wight'), 2] = 200
map3[which(map3[,1]=='Saint-Martin'), 2] = 200
map3[which(map3[,1]=='Curacao'), 2] = 210

# Attaches MID data to map
map.dat$ccode = NA
for(i in 1:nrow(map3)) {
  map.dat$ccode[which(map.dat$region==map3[i,1])] = map3[i,2]
}
unique(map.dat$region[is.na(map.dat$ccode)])
map.dat$ccode = as.numeric(map.dat$ccode)
map.dat$mids = NA
for(i in 1:nrow(midsum)) {
  map.dat$mids[which(map.dat$ccode==midsum[i,2])] = midsum[i,1]
}

# Eliminates Antarctica and bodies of water from map
allRegions = unique(map.dat$region)
noccode = unique(map.dat$region[is.na(map.dat$ccode)])
discardName = setdiff(seq(1, length(noccode), 1), c(8:10, 17))
keepName = setdiff(allRegions, noccode[discardName])
map.dat = map.dat[which(map.dat$region %in% keepName),]

# Plots map
dvmap = ggplot(map.dat, aes(x=long, y=lat, group=group, fill=mids)) + 
  geom_polygon() + 
  scale_fill_gradient(low='#FFFFFF', high='#F8766D', limits=c(0, 60), breaks=seq(0, 60, 10)) +
  theme(panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        legend.key.width=unit(4, 'cm'),
        legend.position='top',
        legend.title=element_blank())
ggsave(file='dvmap.pdf', dvmap, width=9, height=5, units='in')

# Calculates statistics mentioned in the panel
totalMIDs = sum(lsm$majorinitone)
totalCYs = length(which(lsm$majorinitone>0))
pcZeroOne = length(which(lsm$majorinitone<2)) / nrow(lsm)

save(file='panel1_data.RData', ggdvdist, ggdvdist.overlay, ggdvdist.facet, midyear, dvts, 
     midsum, map.dat, map3, dvmap, totalMIDs, totalCYs, pcZeroOne)